- Creado por Hadley Wickham
- Es un set de paquetes de R que evolucionan constantemente.
- El objetivo es que permiten una manera de trabajar mas “limpia” (“tidy”).
- Trabaja siguiendo los principios de “tidy data”
Alejandra G.Cabanillas
library(palmerpenguins) #Set de datos para ejemplos head(penguins)
## # A tibble: 6 x 8 ## species island bill_length_mm bill_depth_mm flipper_length_~ body_mass_g sex ## <fct> <fct> <dbl> <dbl> <int> <int> <fct> ## 1 Adelie Torge~ 39.1 18.7 181 3750 male ## 2 Adelie Torge~ 39.5 17.4 186 3800 fema~ ## 3 Adelie Torge~ 40.3 18 195 3250 fema~ ## 4 Adelie Torge~ NA NA NA NA <NA> ## 5 Adelie Torge~ 36.7 19.3 193 3450 fema~ ## 6 Adelie Torge~ 39.3 20.6 190 3650 male ## # ... with 1 more variable: year <int>
Básicamente un data frame con mas información.
Para aprender mas sobre por que se creo “tidyverse” lee el manifesto del autor
Happy families are all alike; every unhappy family is unhappy in its own way — Leo Tolstoy
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6 v purrr 0.3.4 ## v tibble 3.1.6 v dplyr 1.0.8 ## v tidyr 1.2.0 v stringr 1.4.0 ## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag()
palmerpenguins:: penguins %>% group_by(species) %>% #Eligiendo la agrupación que nos interesa summarize(across(where(is.numeric), mean, na.rm = TRUE))
## # A tibble: 3 x 6 ## species bill_length_mm bill_depth_mm flipper_length_mm body_mass_g year ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 Adelie 38.8 18.3 190. 3701. 2008. ## 2 Chinstrap 48.8 18.4 196. 3733. 2008. ## 3 Gentoo 47.5 15.0 217. 5076. 2008.
# Calculando la media por grupo para cada categoría
Ejemplos:
library(lubridate, warn.conflicts = FALSE) ymd(20101215)
## [1] "2010-12-15"
mdy("4/1/17")
## [1] "2017-04-01"
time <- ymd_hms("2010-12-13 15:30:30") time
## [1] "2010-12-13 15:30:30 UTC"
force_tz(time, "America/Chicago")
## [1] "2010-12-13 15:30:30 CST"
library(lubridate, warn.conflicts = FALSE) duration(1.5, "minutes")
## [1] "90s (~1.5 minutes)"
duration("2days 2hours 2mins 2secs")
## [1] "180122s (~2.08 days)"
#Puedes integrarlo a los operadores lógicos duration("day 2 sec") > "day 1sec"
## [1] TRUE
Aquí podéis leer el capitulo en trabajar con fechas y horas en R
mass_flipper <- ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g)) + geom_point(aes(color = species, shape = species), size = 3, alpha = 0.8) + theme_minimal() + scale_color_manual(values = c("darkorange","purple","cyan4")) + labs(title = "Penguin size, Palmer Station LTER", subtitle = "Flipper length and body mass for Adelie, Chinstrap and Gentoo Penguins", x = "Flipper length (mm)", y = "Body mass (g)", color = "Penguin species", shape = "Penguin species") + theme(legend.position = c(0.2, 0.7), legend.background = element_rect(fill = "white", color = NA), plot.title.position = "plot", plot.caption = element_text(hjust = 0, face= "italic"), plot.caption.position = "plot") mass_flipper
## `geom_smooth()` using formula 'y ~ x'
bill_len_dep <- ggplot(data = penguins, aes(x = bill_length_mm, y = bill_depth_mm, group = species)) + geom_point(aes(color = species, shape = species), size = 3, alpha = 0.8) + geom_smooth(method = "lm", se = FALSE, aes(color = species)) +
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
bmass_hist <- ggplot(data = penguins, aes(x = body_mass_g)) + geom_histogram(aes(fill = species), alpha = 0.5, position = "identity") + scale_fill_manual(values = c("darkorange","purple","cyan4")) + theme_minimal() + labs(x = "Body mass (g)", y = "Frequency", title = "Penguin body mass") mass_hist
library(ggdist) ## add uncertainity visualizations to ggplot2 theme_set(theme_classic()) ## calculate bill ratio and summary stats penguins %>% mutate(bill_ratio = bill_length_mm / bill_depth_mm) %>% filter(!is.na(bill_ratio)) %>% group_by(species) %>% mutate( n = n(), median = median(bill_ratio), max = max(bill_ratio) ) %>% ungroup() %>% mutate(species_num = as.numeric(fct_rev(species))) %>% ggplot(aes(bill_ratio, species_num, color = species)) + stat_summary( geom = "linerange", fun.min = function(x) -Inf, fun.max = function(x) median(x, na.rm = TRUE), linetype = "dotted", orientation = "y", size = .7 ) + geom_point( aes(y = species_num - .15), shape = "|", size = 5, alpha = .33 ) + ggdist::stat_halfeye( aes( y = species_num, color = species, fill = after_scale(colorspace::lighten(color, .5)) ), shape = 18, point_size = 3, interval_size = 1.8, adjust = .5, .width = c(0, 1) ) + geom_text( aes(x = median, label = format(round(median, 2), nsmall = 2)), stat = "unique", color = "white", family = "Open Sans", fontface = "bold", size = 3.4, nudge_y = .15 ) + geom_text( aes(x = max, label = glue::glue("n = {n}")), stat = "unique", family = "Open Sans", fontface = "bold", size = 3.5, hjust = 0, nudge_x = .01, nudge_y = .02 ) + coord_cartesian(clip = "off", expand = FALSE) + scale_x_continuous( limits = c(1.6, 3.8), breaks = seq(1.6, 3.8, by = .2) ) + scale_y_continuous( limits = c(.55, NA), breaks = 1:3, labels = c("Gentoo", "Chinstrap", "Adélie") ) + scale_color_manual(values = c("#3d6721", "#a86826", "#006c89"), guide = "none") + scale_fill_manual(values = c("#3d6721", "#a86826", "#006c89"), guide = "none") + labs( x = "Bill ratio", y = NULL, subtitle = "B. Raincloud plot showing the distribution of bill ratios, estimated as bill length divided by bill depth.", caption = "Data: Gorman, Williams & Fraser (2014) *PLoS ONE* • Illustration: Allison Horst" ) + theme( panel.grid.major.x = element_line(size = .35), panel.grid.major.y = element_blank(), axis.text.y = element_text(size = 13), axis.ticks.length = unit(0, "lines"), plot.title.position = 'plot', plot.subtitle = element_text(margin = margin(t = 5, b = 10)), plot.margin = margin(10, 25, 10, 25) )